library(synapser)
library(tidyverse)
synLogin()
## Welcome, Robert Allaway!
## NULL
library(circlize)
# get counts file
counts <- synGet("syn29532377", version = 2)$path %>% readr::read_tsv()
# get metadata
query <- "SELECT * FROM syn11601459 where assay = 'rnaSeq' and fileFormat = 'sf' and name = 'quant.sf'"
metadata <- synTableQuery(query)$asDataFrame()
##
[####################]100.00% 1/1 Done...
Downloading [####################]100.00% 10.5kB/10.5kB (6.0MB/s) SYNAPSE_TABLE_QUERY_106582141.csv Done...
counts_tidy <- counts %>%
group_by(gene_id) %>%
gather(contains("NF"), key = "cell_line", value = "counts") %>%
rename(ensembl_gene_id_version = gene_id) %>%
ungroup()
#source https://www.biostars.org/p/44426/
library(biomaRt)
mart <- useMart(biomart = "ensembl", dataset = "hsapiens_gene_ensembl")
## Ensembl site unresponsive, trying asia mirror
## Ensembl site unresponsive, trying uswest mirror
## Ensembl site unresponsive, trying useast mirror
gene_list <- getBM(attributes = c("ensembl_gene_id_version", "chromosome_name", "start_position", "end_position"),
values = counts$gene_id,
filter = "ensembl_gene_id_version",
mart = mart)
counts_tidy_2 <- inner_join(counts_tidy, gene_list) %>%
filter(!cell_line %in% c("hTERT_NF1_ipNF05.5","hTERT_NF1_ipNF95.11b_C","hTERT_NF1_ipNF95.6"))
## Joining, by = "ensembl_gene_id_version"
##no matched cnv data for pnf, remove these
query <- synTableQuery("SELECT * FROM syn35928271.2")$asDataFrame()
##
[####################]100.00% 1/1 Done...
Downloading [####################]100.00% 4.1kB/4.1kB (11.3MB/s) SYNAPSE_TABLE_QUERY_106582322.csv Done...
bed_ents <- lapply(query$id, synGet)
beds <- lapply(bed_ents, function(x){
readr::read_delim(x$path, col_names = c('chr','start','end','value1'))
})
## Rows: 3770 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: " "
## chr (1): chr
## dbl (3): start, end, value1
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 3912 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: " "
## chr (1): chr
## dbl (3): start, end, value1
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 3451 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: " "
## chr (1): chr
## dbl (3): start, end, value1
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 3670 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: " "
## chr (1): chr
## dbl (3): start, end, value1
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 3682 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: " "
## chr (1): chr
## dbl (3): start, end, value1
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 3668 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: " "
## chr (1): chr
## dbl (3): start, end, value1
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 3321 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: " "
## chr (1): chr
## dbl (3): start, end, value1
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 3733 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: " "
## chr (1): chr
## dbl (3): start, end, value1
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 4023 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: " "
## chr (1): chr
## dbl (3): start, end, value1
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 4030 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: " "
## chr (1): chr
## dbl (3): start, end, value1
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 3943 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: " "
## chr (1): chr
## dbl (3): start, end, value1
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 3991 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: " "
## chr (1): chr
## dbl (3): start, end, value1
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 3267 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: " "
## chr (1): chr
## dbl (3): start, end, value1
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 4422 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: " "
## chr (1): chr
## dbl (3): start, end, value1
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
bed_names <- lapply(bed_ents, function(x) {stringr::str_remove(x$properties$name, ".bed")})
names(beds) <- bed_names
reorder_beds <- c("28cNF", "i28cNF", "cNF04.9a", "icNF04.9a", "cNF00.10a", "icNF00.10a", "cNF97.2a", "icNF97.2a", "cNF97.2b", "icNF97.2b", "cNF98.4c", "icNF98.4c", "cNF98.4d", "icNF98.4d")
beds <- beds[reorder_beds]
for(i in unique(counts_tidy_2$cell_line)){
counts_bed <- counts_tidy_2 %>% filter(cell_line == i) %>%
dplyr::select(chromosome_name, start_position, end_position, counts) %>%
set_names(c("chr", "start", "end", "value1")) %>%
mutate(chr = paste0("chr", chr)) %>%
mutate(value1 = log10(value1+1))
cnv_bed <- beds[[i]] %>%
mutate(value1 = log10(value1+1))
circos.initializeWithIdeogram(species = "hg38")
circos.genomicTrackPlotRegion(counts_bed, numeric.column = c("value1"),
panel.fun = function(region, value, ...) {
circos.genomicPoints(region, value, pch = 20, cex = 0.1, col = '#003554')
})
circos.genomicTrackPlotRegion(cnv_bed, numeric.column = c("value1"),
panel.fun = function(region, value, ...) {
circos.genomicLines(region, value, col = "#FC6471", area = T)
})
title(i)
}
Plot again, but this time to save PDFs.
for(i in unique(counts_tidy_2$cell_line)){
counts_bed <- counts_tidy_2 %>% filter(cell_line == i) %>%
dplyr::select(chromosome_name, start_position, end_position, counts) %>%
set_names(c("chr", "start", "end", "value1")) %>%
mutate(chr = paste0("chr", chr)) %>%
mutate(value1 = log10(value1+1))
cnv_bed <- beds[[i]] %>%
mutate(value1 = log10(value1+1))
filename <- glue::glue("../figures/{i}_cnv_circos.pdf")
pdf(filename)
circos.initializeWithIdeogram(species = "hg38")
circos.genomicTrackPlotRegion(counts_bed, numeric.column = c("value1"),
panel.fun = function(region, value, ...) {
circos.genomicPoints(region, value, pch = 20, cex = 0.1, col = '#003554')
})
circos.genomicTrackPlotRegion(cnv_bed, numeric.column = c("value1"),
panel.fun = function(region, value, ...) {
circos.genomicLines(region, value, col = "#FC6471", area = T)
})
title(i)
dev.off()
}